import json
import os


def read_jsonl(path):
    with open(path, 'r', encoding='utf8') as f:
        while True:
            line = f.readline()
            if not line:
                return

            if '\r\n' == line[-2:]:
                line = line[:-2]
            elif '\n' == line[-1:]:
                line = line[:-1]

            obj = json.loads(line)
            yield obj


if __name__ == '__main__':

    def main():
        limit = 5  # 就看前几条，设为0则不限制
        path = r'D:\_dell7590_root\sync\1_usb\M2\main\sci\com\ai\teach in baway\帮扶\wei shao lun\20240228Wed\DISC-Law-SFT-Triplet-released.jsonl'
        dir = os.path.dirname(os.path.abspath(path))
        target_path = os.path.join(dir, 'converted_ft_data.json')
        generator = read_jsonl(path)
        print(f'Reading from {path}')
        print(f'Write limit={limit} to file {target_path}')
        with open(target_path, 'w', encoding='utf8', newline='\r\n') as f:
            for i, data in enumerate(generator):
                n = i + 1
                if limit and n > limit:
                    break
                red = "".join(data['reference'])
                new_data = {
                    "id": data["id"],
                    "conversations": [
                        {
                            "from": f"参考法律：{red}。{data['input']} ",
                            "value": data["output"]
                        }
                    ]
                }
                json.dump(new_data, f, ensure_ascii=False)
                f.write('\n')

    main()

